*** 
*** Code for "The Labor Market Integration of Refugee Migrants in High-Income Countries"
*** Courtney Brell, Christian Dustmann, and Ian Preston
***
*** Analysis of the German Socio-Economic Panel
*** This file should be run in the folder containing the SOEP data
***

********************************************************************************
*** Preliminaries

clear all

* Import data
use "stata_de+en\pl.dta", clear
merge 1:1 pid syear using "stata_de+en\ppathl.dta", keep(match) assert (match using) nogen
label language EN

* Generate demographic variables
gen countryoforigin=corigin
label values countryoforigin corigin_EN
gen female = .
replace female=1 if sex==2
replace female=0 if sex==1
gen birthyear = gebjahr
gen age = syear-birthyear

* Calculate the number of years since arrival
gen yearssincearrive=syear-immiyear

* Migrant type determined in the first instance by the survey subsamples
gen refugee=1 if inlist(sample1,30,31,34)
gen native=1 if inlist(sample1,1,2,3,4,5,6,7,10)|inlist(sample1,11,12,13,16,20,21,33)
gen immigrant=1 if inlist(sample1,24,29)
* Drop German-born immigrants and non-German-born natives
drop if countryoforigin==1&(refugee==1|immigrant==1)
drop if countryoforigin!=1&native==1
* Drop natives prior to 2013 to better match other samples
drop if syear<2013
* Drop refugees from "other immigrant" sample
drop if (arefback==2|arefback==3)&immigrant==1

* Generate migrant type variable
gen migranttype=0 if native==1
replace migranttype=1 if refugee==1
replace migranttype=2 if immigrant==1
label define Lmigrant 0 "Native" 1 "Refugee" 2 "Other immigrant"
label values migranttype Lmigrant
replace yearssincearrive=0 if migranttype==0

* Sample of interest
drop if missing(migranttype)
replace yearssincearrive=0 if migranttype==0
keep if age>=20&age<=64
keep if (yearssincearrive>=0&yearssincearrive<=10)

* Use survey weights
gen indweight=phrf

********************************************************************************
*** Calculate labor market outcomes

* Employment
recode plb0022_h (1 2 =1) (3 4 5 6 7 8 9 10=0) (else=.), gen(employment)

* Wages
gen nominalwage=plc0014_h // Net montly income
gen wage=nominalwage if nominalwage>0 & employment!=0

* Generate inflation index
gen index=.
replace index=98.59416 if syear==2013
replace index=99.48821 if syear==2014
replace index=100 if syear==2015
replace index=100.4917 if syear==2016
replace index=102.0087 if syear==2017
* Calculate real wage
replace wage=wage*100/index
* Convert to weekly wage
replace wage=wage*12*7/365.2425 // Convert to weekly wage

* Count numbers of nonmissing observations
gen Nemp=!missing(employment)
gen Ninc=!missing(wage)

preserve
collapse (mean) employment avg_income=wage (rawsum) Nemp Ninc [aw=indweight], by(female yearssincearrive migranttype)
save "DE-SOEP", replace
restore
preserve
collapse (mean) employment avg_income=wage (rawsum) Nemp Ninc [aw=indweight], by(yearssincearrive migranttype)
append using "DE-SOEP"
order yearssincearrive migranttype female employment Nemp avg_income Ninc
sort migranttype female yearssincearrive
save "DE-SOEP", replace
restore

********************************************************************************
*** Calculate sample descriptives

forval loopmig=0(1)2{
preserve
	disp `loopmig'
	keep if migranttype==`loopmig'

	* # Observations
	count
	* # Unique individuals
	egen persTag=tag(persnr)
	tab persTag
	
	* Gender
	sum female [aw=indweight]
	* Age
	sum age [aw=indweight], detail

	* Time since arrival
	sum yearssincearrive [aw=indweight], detail

	* Age at arrival
	gen age_at_arrival=age-yearssincearrive
	sum age_at_arrival [aw=indweight], detail

	* Year of arrival
	gen year_of_arrival=syear-yearssincearrive
	sum year_of_arrival [aw=indweight], detail

	* LM outcomes
	count if !missing(employment)
	sum employment [aw=indweight], detail
	count if !missing(wage)
	sum wage [aw=indweight], detail
	
	* Country of origin
	capture gen dummy=1
	collapse (sum) numrefugees=dummy [iw=indweight], by(countryoforigin)
	egen totalrefugees=sum(numrefugees)
	gen fracrefugees=numrefugees/totalrefugees
	gsort -fracrefugees
	gen thecounter=_n
	list countryoforigin fracrefugees if thecounter<=10
restore
}

********************************************************************************
*** Clean up

clear all
